#!/usr/bin/env python3
#
# An OBS Source Service to retrieve and verify Go module sources
# as specified in go.mod and go.sum.
#
# (C) 2019 SUSE LLC
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.
#
"""\
OBS Source Service to download, verify and vendor Go module
dependency sources. Using go.mod and go.sum present in a Go
application, call go tools in sequence:

go mod download
go mod verify
go mod vendor

obs-service-go_modules will create a vendor tarball, compressed with
the specified method (default to "gz"), containing the
vendor/ directory populated by go mod vendor.

See README.md for additional documentation.
"""

import logging
import argparse
import re
import libarchive
import os
import shutil
import sys
import tempfile

from pathlib import Path
from subprocess import run

app_name = "obs-service-go_modules"

description = __doc__

DEFAULT_COMPRESSION = "gz"


def get_archive_parameters(args):
    archive_format = None
    archive_compression = None
    archive_extension = None

    if args.compression == "obscpio" and "cpio" in libarchive.ffi.READ_FORMATS:
        archive_format = "cpio_newc"
        archive_compression = None
        archive_extension = args.compression

    elif args.compression == "tar" and "tar" in libarchive.ffi.READ_FORMATS:
        archive_format = "gnutar"
        archive_compression = None
        archive_extension = args.compression

    else:
        compression_format = args.compression

        if args.compression == "gz":
            compression_format = "gzip"

        elif args.compression == "zst":
            compression_format = "zstd"

        if compression_format not in libarchive.ffi.READ_FILTERS:
            log.error(
                f'The specified compression mode is not supported: "{args.compression}"'
            )
            exit(1)

        archive_format = "gnutar"
        archive_compression = compression_format
        archive_extension = "tar." + (args.compression)

    return archive_format, archive_compression, archive_extension


def basename_from_archive_name(archive_name):
    basename = re.sub(
        "^(?P<service_prefix>_service:[^:]+:)?(?P<basename>.*)\.(?P<extension>obscpio|tar\.[^\.]+)$",
        r"\g<basename>",
        archive_name,
    )
    if basename:
        log.info(f"Detected basename {basename} form archive name")
    return basename


def basename_from_archive(archive_name):
    paths = []
    with libarchive.file_reader(archive_name) as archive:
        for entry in archive:
            if entry.isdir:
                paths.append(entry.name)
        try:
            basename = os.path.commonpath(paths)
        except ValueError:
            return
    log.info(f"Detected basename {basename} from archive")
    return basename


def archive_autodetect():
    """Find the most likely candidate file that contains go.mod and go.sum.
    For most Go applications this will be app-x.y.z.tar.gz or other supported compression.
    Use the name of the .spec file as the stem for the archive to detect.
    Archive formats supported:
    - .tar.gz
    - .tar.xz
    - .tar.zstd
    """
    log.info(f"Autodetecting archive since no archive param provided in _service")
    cwd = Path.cwd()
    # first .spec under cwd or None
    spec = next(reversed(sorted(Path(cwd).glob("*.dsc"))), None)
    if not spec:
        log.debug(f"Archive autodetection found no spec or dsc file under {cwd}")
        # exit(1)
    else:
        archive = None
        spec_dir = spec.parent  # typically the same as cwd
        spec_stem = spec.stem  # stem is app in app.spec
        # highest sorted archive under spec_dir
        patterns = [
            f"{spec.stem}*.tar.*",
            f"{spec.stem}*.obscpio",
            f"_service:*:{spec.stem}*tar.*",
            f"_service:*:{spec.stem}*obscpio",
        ]
        for pattern in patterns:
            log.debug(f"Trying to find archive name with pattern {pattern}")
            archive = next(reversed(sorted(Path(spec_dir).glob(pattern))), None)

            if archive:
                break

    if not archive:
        log.error(f"Archive autodetection found no matching archive")
        exit(1)

    log.info(f"Archive autodetected at {archive}")
    # Check that app.spec Version: directive value
    # is a substring of detected archive filename
    # Warn if there is disagreement between the versions.
    pattern = re.compile(r"^Version:\s+([\S]+)$", re.IGNORECASE)
    with spec.open(encoding="utf-8") as f:
        for line in f:
            versionmatch = pattern.match(line)
            if versionmatch:
                version = versionmatch.groups(0)[0]
        if not version:
            log.warning(f"Version not found in {spec.name}")
        else:
            if not (version in archive.name):
                log.warning(
                    f"Version {version} in {spec.name} does not match {archive.name}"
                )
    return str(archive.name)  # return string not PosixPath


def extract(filename, outdir):
    log.info(f"Extracting {filename} to {outdir}")

    cwd = os.getcwd()

    # make path absolute so we can switch away from the current working directory
    filename = os.path.join(cwd, filename)

    log.info(f"Switching to {outdir}")
    os.chdir(outdir)

    try:
        libarchive.extract_file(filename)
    except libarchive.exception.ArchiveError as archive_error:
        log.error(archive_error)
        exit(1)

    os.chdir(cwd)


def cmd_go_mod(cmd, dir):
    """Execute go mod subcommand using subprocess.run().
    Capture both stderr and stdout as text.
    Log as info or error in this function body.
    Return CompletedProcess object to caller for control flow.
    """
    log.info(f"go mod {cmd}")
    # subprocess.run() returns CompletedProcess cp
    if sys.version_info >= (3, 7):
        cp = run(["go", "mod", cmd], cwd=dir, capture_output=True, text=True, env = {"GOPROXY": "https://goproxy.cn", "GOMODCACHE": "/tmp/gocache", "GOPATH": "/tmp/gopath"})
    else:
        cp = run(["go", "mod", cmd], cwd=dir, env = {"GOPROXY": "https://goproxy.cn", "GOMODCACHE": "/tmp/gocache", "GOPATH": "/tmp/gopath"})
    if cp.returncode:
        log.error(cp.stderr.strip())
    return cp


def sanitize_subdir(dir, subdir):
    ret = os.path.normpath(subdir)
    if dir == os.path.commonpath([dir, ret]):
        return ret
    log.error("Invalid path: {ret} not subdir of {dir}")
    exit(1)


def main():
    log.info(f"Running OBS Source Service: {app_name}")

    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument("--strategy", default="vendor")
    parser.add_argument("--archive")
    parser.add_argument("--outdir")
    parser.add_argument("--compression", default=DEFAULT_COMPRESSION)
    parser.add_argument("--basename")
    parser.add_argument("--subdir")
    args = parser.parse_args()

    outdir = args.outdir
    subdir = args.subdir

    archive_format, archive_compression, archive_ext = get_archive_parameters(args)
    vendor_tarname = f"vendor.{archive_ext}"
    archive = args.archive or archive_autodetect()
    log.info(f"Using archive {archive}")

    with tempfile.TemporaryDirectory() as tempdir:
        extract(archive, tempdir)

        basename = (
            args.basename
            or basename_from_archive(archive)
            or basename_from_archive_name(archive)
        )
        if subdir:
            go_mod_path = sanitize_subdir(
                tempdir, os.path.join(tempdir, basename, subdir, "go.mod")
            )
        else:
            go_mod_path = sanitize_subdir(
                tempdir, os.path.join(tempdir, basename, "go.mod")
            )
        if go_mod_path and os.path.exists(go_mod_path):
            go_mod_dir = os.path.dirname(go_mod_path)
            log.info(f"Using go.mod found at {go_mod_path}")
        else:
            log.error(f"File go.mod not found under {os.path.join(tempdir, basename)}")
            exit(1)

        if args.strategy == "vendor":
            # go subcommand sequence:
            # - go mod download
            #   (is sensitive to invalid module versions, try and log warn if fails)
            # - go mod vendor
            #   (also downloads but use separate steps for visibility in OBS environment)
            # - go mod verify
            #   (validates checksums)

            # return value cp is type subprocess.CompletedProcess
            cp = cmd_go_mod("download", go_mod_dir)
            if cp.returncode:
                if "invalid version" in cp.stderr:
                    log.warning(
                        f"go mod download is more sensitive to invalid module versions than go mod vendor"
                    )
                    log.warning(
                        f"if go mod vendor and go mod verify complete, vendoring is successful"
                    )
                else:
                    log.error("go mod download failed")
                    exit(1)

            cp = cmd_go_mod("vendor", go_mod_dir)
            if cp.returncode:
                log.error("go mod vendor failed")
                exit(1)

            cp = cmd_go_mod("verify", go_mod_dir)
            if cp.returncode:
                log.error("go mod verify failed")
                exit(1)

            log.info(f"Vendor go.mod dependencies to {vendor_tarname}")
            vendor_tarfile = os.path.join(outdir, vendor_tarname)
            cwd = os.getcwd()
            os.chdir(go_mod_dir)
            vendor_dir = "vendor"

            with libarchive.file_writer(
                vendor_tarfile, archive_format, archive_compression
            ) as new_archive:
                new_archive.add_files(vendor_dir)
            os.chdir(cwd)


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    log = logging.getLogger(app_name)
    main()
